#ifndef CUFFTDX_FFT_6561_FP64_INV_PTX_HPP
#define CUFFTDX_FFT_6561_FP64_INV_PTX_HPP



template<> __forceinline__ __device__ void cufftdx_private_function<1171, double, 1>(cufftdx::detail::complex<double> *rmem, unsigned smem){

asm volatile (R"({
.reg .b32 r<24>;
.reg .f64 fd<843>;
.reg .b64 rd<17>;
mov.u32 r1, %tid.y;
mov.u32 r2, %18;
mad.lo.s32 r3, r1, 104976, r2;
add.f64 fd37, %30, %38;
add.f64 fd38, %22, fd37;
add.f64 fd39, %31, %39;
add.f64 fd40, %23, fd39;
mul.f64 fd41, fd37, 0d3FE0000000000000;
sub.f64 fd42, %22, fd41;
sub.f64 fd43, %31, %39;
mul.f64 fd44, fd43, 0dBFEBB67AE8584CAA;
add.f64 fd45, fd44, fd42;
sub.f64 fd46, fd42, fd44;
mul.f64 fd47, fd39, 0d3FE0000000000000;
sub.f64 fd48, %23, fd47;
sub.f64 fd49, %30, %38;
mul.f64 fd50, fd49, 0dBFEBB67AE8584CAA;
sub.f64 fd51, fd48, fd50;
add.f64 fd52, fd50, fd48;
add.f64 fd53, %32, %40;
add.f64 fd54, %24, fd53;
add.f64 fd55, %34, %42;
add.f64 fd56, %26, fd55;
mul.f64 fd57, fd53, 0d3FE0000000000000;
sub.f64 fd58, %24, fd57;
sub.f64 fd59, %34, %42;
mul.f64 fd60, fd59, 0dBFEBB67AE8584CAA;
add.f64 fd61, fd60, fd58;
sub.f64 fd62, fd58, fd60;
mul.f64 fd63, fd55, 0d3FE0000000000000;
sub.f64 fd64, %26, fd63;
sub.f64 fd65, %32, %40;
mul.f64 fd66, fd65, 0dBFEBB67AE8584CAA;
sub.f64 fd67, fd64, fd66;
add.f64 fd68, fd66, fd64;
add.f64 fd69, %35, %43;
add.f64 fd70, %27, fd69;
add.f64 fd71, %37, %44;
add.f64 fd72, %29, fd71;
mul.f64 fd73, fd69, 0d3FE0000000000000;
sub.f64 fd74, %27, fd73;
sub.f64 fd75, %37, %44;
mul.f64 fd76, fd75, 0dBFEBB67AE8584CAA;
add.f64 fd77, fd76, fd74;
sub.f64 fd78, fd74, fd76;
mul.f64 fd79, fd71, 0d3FE0000000000000;
sub.f64 fd80, %29, fd79;
sub.f64 fd81, %35, %43;
mul.f64 fd82, fd81, 0dBFEBB67AE8584CAA;
sub.f64 fd83, fd80, fd82;
add.f64 fd84, fd82, fd80;
mov.u32 r4, %tid.x;
mul.f64 fd85, fd61, 0d3FE8836FA2CF5039;
mul.f64 fd86, fd67, 0d3FE491B7523C161D;
sub.f64 fd87, fd85, fd86;
mul.f64 fd88, fd67, 0d3FE8836FA2CF5039;
fma.rn.f64 fd89, fd61, 0d3FE491B7523C161D, fd88;
mul.f64 fd90, fd77, 0d3FC63A1A7E0B738A;
mul.f64 fd91, fd83, 0d3FEF838B8C811C17;
sub.f64 fd92, fd90, fd91;
mul.f64 fd93, fd83, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd94, fd77, 0d3FEF838B8C811C17, fd93;
mul.f64 fd95, fd62, 0d3FC63A1A7E0B738A;
mul.f64 fd96, fd68, 0d3FEF838B8C811C17;
sub.f64 fd97, fd95, fd96;
mul.f64 fd98, fd68, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd99, fd62, 0d3FEF838B8C811C17, fd98;
mul.f64 fd100, fd78, 0dBFEE11F642522D1C;
mul.f64 fd101, fd84, 0d3FD5E3A8748A0BF5;
sub.f64 fd102, fd100, fd101;
mul.f64 fd103, fd84, 0dBFEE11F642522D1C;
fma.rn.f64 fd104, fd78, 0d3FD5E3A8748A0BF5, fd103;
add.f64 fd105, fd54, fd70;
add.f64 fd106, fd56, fd72;
mul.f64 fd107, fd105, 0d3FE0000000000000;
sub.f64 fd108, fd38, fd107;
sub.f64 fd109, fd56, fd72;
mul.f64 fd110, fd109, 0dBFEBB67AE8584CAA;
add.f64 fd111, fd110, fd108;
sub.f64 fd112, fd108, fd110;
mul.f64 fd113, fd106, 0d3FE0000000000000;
sub.f64 fd114, fd40, fd113;
sub.f64 fd115, fd54, fd70;
mul.f64 fd116, fd115, 0dBFEBB67AE8584CAA;
sub.f64 fd117, fd114, fd116;
add.f64 fd118, fd116, fd114;
add.f64 fd119, fd87, fd92;
add.f64 fd120, fd45, fd119;
add.f64 fd121, fd89, fd94;
add.f64 fd122, fd51, fd121;
mul.f64 fd123, fd119, 0d3FE0000000000000;
sub.f64 fd124, fd45, fd123;
sub.f64 fd125, fd89, fd94;
mul.f64 fd126, fd125, 0dBFEBB67AE8584CAA;
add.f64 fd127, fd126, fd124;
sub.f64 fd128, fd124, fd126;
mul.f64 fd129, fd121, 0d3FE0000000000000;
sub.f64 fd130, fd51, fd129;
sub.f64 fd131, fd87, fd92;
mul.f64 fd132, fd131, 0dBFEBB67AE8584CAA;
sub.f64 fd133, fd130, fd132;
add.f64 fd134, fd132, fd130;
add.f64 fd135, fd97, fd102;
add.f64 fd136, fd46, fd135;
add.f64 fd137, fd99, fd104;
add.f64 fd138, fd52, fd137;
mul.f64 fd139, fd135, 0d3FE0000000000000;
sub.f64 fd140, fd46, fd139;
sub.f64 fd141, fd99, fd104;
mul.f64 fd142, fd141, 0dBFEBB67AE8584CAA;
add.f64 fd143, fd142, fd140;
sub.f64 fd144, fd140, fd142;
mul.f64 fd145, fd137, 0d3FE0000000000000;
sub.f64 fd146, fd52, fd145;
sub.f64 fd147, fd97, fd102;
mul.f64 fd148, fd147, 0dBFEBB67AE8584CAA;
sub.f64 fd149, fd146, fd148;
add.f64 fd150, fd148, fd146;
mul.wide.u32 rd2, r4, 1508246403;
shr.u64 rd3, rd2, 40;
cvt.u32.u64 r5, rd3;
mul.lo.s32 r6, r5, 729;
sub.s32 r7, r4, r6;
mad.lo.s32 r8, r5, 104976, r3;
mul.wide.u32 rd4, r7, 16;
mov.u64 rd5, %19;
add.s64 rd6, rd5, rd4;
ld.global.v2.f64 {fd151, fd152}, [rd6];
mul.f64 fd155, fd122, fd152;
mul.f64 fd156, fd120, fd152;
mul.f64 fd157, fd151, fd122;
mul.f64 fd158, fd151, fd151;
mul.f64 fd159, fd152, fd152;
sub.f64 fd160, fd158, fd159;
mul.f64 fd161, fd152, fd151;
fma.rn.f64 fd162, fd152, fd151, fd161;
mul.f64 fd163, fd138, fd162;
mul.f64 fd164, fd136, fd162;
mul.f64 fd165, fd160, fd138;
mul.f64 fd166, fd151, fd160;
mul.f64 fd167, fd152, fd162;
sub.f64 fd168, fd166, fd167;
mul.f64 fd169, fd151, fd162;
fma.rn.f64 fd170, fd152, fd160, fd169;
mul.f64 fd171, fd117, fd170;
mul.f64 fd172, fd111, fd170;
mul.f64 fd173, fd168, fd117;
mul.f64 fd174, fd151, fd168;
mul.f64 fd175, fd152, fd170;
sub.f64 fd176, fd174, fd175;
mul.f64 fd177, fd151, fd170;
fma.rn.f64 fd178, fd152, fd168, fd177;
mul.f64 fd179, fd133, fd178;
mul.f64 fd180, fd127, fd178;
mul.f64 fd181, fd176, fd133;
ld.global.v2.f64 {fd182, fd183}, [rd6+11664];
mul.f64 fd186, fd149, fd183;
mul.f64 fd187, fd143, fd183;
mul.f64 fd188, fd182, fd149;
mul.f64 fd189, fd151, fd182;
mul.f64 fd190, fd152, fd183;
sub.f64 fd191, fd189, fd190;
mul.f64 fd192, fd151, fd183;
fma.rn.f64 fd193, fd152, fd182, fd192;
mul.f64 fd194, fd118, fd193;
mul.f64 fd195, fd112, fd193;
mul.f64 fd196, fd191, fd118;
mul.f64 fd197, fd151, fd191;
mul.f64 fd198, fd152, fd193;
sub.f64 fd199, fd197, fd198;
mul.f64 fd200, fd151, fd193;
fma.rn.f64 fd201, fd152, fd191, fd200;
mul.f64 fd202, fd134, fd201;
mul.f64 fd203, fd128, fd201;
mul.f64 fd204, fd199, fd134;
mul.f64 fd205, fd151, fd199;
mul.f64 fd206, fd152, fd201;
sub.f64 fd207, fd205, fd206;
mul.f64 fd208, fd151, fd201;
fma.rn.f64 fd209, fd152, fd199, fd208;
mul.f64 fd210, fd150, fd209;
mul.f64 fd211, fd144, fd209;
mul.f64 fd212, fd207, fd150;
barrier.sync 0;
mad.lo.s32 r9, r7, 144, r8;
add.f64 fd213, fd40, fd106;
add.f64 fd214, fd38, fd105;
st.shared.v2.f64 [r9], {fd214, fd213};
fma.rn.f64 fd215, fd151, fd120, fd155;
sub.f64 fd216, fd157, fd156;
st.shared.v2.f64 [r9+16], {fd215, fd216};
fma.rn.f64 fd217, fd160, fd136, fd163;
sub.f64 fd218, fd165, fd164;
st.shared.v2.f64 [r9+32], {fd217, fd218};
sub.f64 fd219, fd173, fd172;
fma.rn.f64 fd220, fd168, fd111, fd171;
st.shared.v2.f64 [r9+48], {fd220, fd219};
fma.rn.f64 fd221, fd176, fd127, fd179;
sub.f64 fd222, fd181, fd180;
st.shared.v2.f64 [r9+64], {fd221, fd222};
fma.rn.f64 fd223, fd182, fd143, fd186;
sub.f64 fd224, fd188, fd187;
st.shared.v2.f64 [r9+80], {fd223, fd224};
fma.rn.f64 fd225, fd191, fd112, fd194;
sub.f64 fd226, fd196, fd195;
st.shared.v2.f64 [r9+96], {fd225, fd226};
fma.rn.f64 fd227, fd199, fd128, fd202;
sub.f64 fd228, fd204, fd203;
st.shared.v2.f64 [r9+112], {fd227, fd228};
fma.rn.f64 fd229, fd207, fd144, fd210;
sub.f64 fd230, fd212, fd211;
st.shared.v2.f64 [r9+128], {fd229, fd230};
barrier.sync 0;
shl.b32 r10, r7, 7;
sub.s32 r11, r9, r10;
ld.shared.v2.f64 {fd231, fd232}, [r11];
ld.shared.v2.f64 {fd235, fd236}, [r11+11664];
ld.shared.v2.f64 {fd239, fd240}, [r11+23328];
ld.shared.v2.f64 {fd243, fd244}, [r11+34992];
ld.shared.v2.f64 {fd247, fd248}, [r11+46656];
ld.shared.v2.f64 {fd251, fd252}, [r11+58320];
ld.shared.v2.f64 {fd255, fd256}, [r11+69984];
ld.shared.v2.f64 {fd259, fd260}, [r11+81648];
ld.shared.v2.f64 {fd263, fd264}, [r11+93312];
add.f64 fd267, fd243, fd255;
add.f64 fd268, fd231, fd267;
add.f64 fd269, fd244, fd256;
add.f64 fd270, fd232, fd269;
mul.f64 fd271, fd267, 0d3FE0000000000000;
sub.f64 fd272, fd231, fd271;
sub.f64 fd273, fd244, fd256;
mul.f64 fd274, fd273, 0dBFEBB67AE8584CAA;
add.f64 fd275, fd274, fd272;
sub.f64 fd276, fd272, fd274;
mul.f64 fd277, fd269, 0d3FE0000000000000;
sub.f64 fd278, fd232, fd277;
sub.f64 fd279, fd243, fd255;
mul.f64 fd280, fd279, 0dBFEBB67AE8584CAA;
sub.f64 fd281, fd278, fd280;
add.f64 fd282, fd280, fd278;
add.f64 fd283, fd247, fd259;
add.f64 fd284, fd235, fd283;
add.f64 fd285, fd248, fd260;
add.f64 fd286, fd236, fd285;
mul.f64 fd287, fd283, 0d3FE0000000000000;
sub.f64 fd288, fd235, fd287;
sub.f64 fd289, fd248, fd260;
mul.f64 fd290, fd289, 0dBFEBB67AE8584CAA;
add.f64 fd291, fd290, fd288;
sub.f64 fd292, fd288, fd290;
mul.f64 fd293, fd285, 0d3FE0000000000000;
sub.f64 fd294, fd236, fd293;
sub.f64 fd295, fd247, fd259;
mul.f64 fd296, fd295, 0dBFEBB67AE8584CAA;
sub.f64 fd297, fd294, fd296;
add.f64 fd298, fd296, fd294;
add.f64 fd299, fd251, fd263;
add.f64 fd300, fd239, fd299;
add.f64 fd301, fd252, fd264;
add.f64 fd302, fd240, fd301;
mul.f64 fd303, fd299, 0d3FE0000000000000;
sub.f64 fd304, fd239, fd303;
sub.f64 fd305, fd252, fd264;
mul.f64 fd306, fd305, 0dBFEBB67AE8584CAA;
add.f64 fd307, fd306, fd304;
sub.f64 fd308, fd304, fd306;
mul.f64 fd309, fd301, 0d3FE0000000000000;
sub.f64 fd310, fd240, fd309;
sub.f64 fd311, fd251, fd263;
mul.f64 fd312, fd311, 0dBFEBB67AE8584CAA;
sub.f64 fd313, fd310, fd312;
add.f64 fd314, fd312, fd310;
mul.f64 fd315, fd291, 0d3FE8836FA2CF5039;
mul.f64 fd316, fd297, 0d3FE491B7523C161D;
sub.f64 fd317, fd315, fd316;
mul.f64 fd318, fd297, 0d3FE8836FA2CF5039;
fma.rn.f64 fd319, fd291, 0d3FE491B7523C161D, fd318;
mul.f64 fd320, fd307, 0d3FC63A1A7E0B738A;
mul.f64 fd321, fd313, 0d3FEF838B8C811C17;
sub.f64 fd322, fd320, fd321;
mul.f64 fd323, fd313, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd324, fd307, 0d3FEF838B8C811C17, fd323;
mul.f64 fd325, fd292, 0d3FC63A1A7E0B738A;
mul.f64 fd326, fd298, 0d3FEF838B8C811C17;
sub.f64 fd327, fd325, fd326;
mul.f64 fd328, fd298, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd329, fd292, 0d3FEF838B8C811C17, fd328;
mul.f64 fd330, fd308, 0dBFEE11F642522D1C;
mul.f64 fd331, fd314, 0d3FD5E3A8748A0BF5;
sub.f64 fd332, fd330, fd331;
mul.f64 fd333, fd314, 0dBFEE11F642522D1C;
fma.rn.f64 fd334, fd308, 0d3FD5E3A8748A0BF5, fd333;
add.f64 fd335, fd284, fd300;
add.f64 fd336, fd286, fd302;
mul.f64 fd337, fd335, 0d3FE0000000000000;
sub.f64 fd338, fd268, fd337;
sub.f64 fd339, fd286, fd302;
mul.f64 fd340, fd339, 0dBFEBB67AE8584CAA;
add.f64 fd341, fd340, fd338;
sub.f64 fd342, fd338, fd340;
mul.f64 fd343, fd336, 0d3FE0000000000000;
sub.f64 fd344, fd270, fd343;
sub.f64 fd345, fd284, fd300;
mul.f64 fd346, fd345, 0dBFEBB67AE8584CAA;
sub.f64 fd347, fd344, fd346;
add.f64 fd348, fd346, fd344;
add.f64 fd349, fd317, fd322;
add.f64 fd350, fd275, fd349;
add.f64 fd351, fd319, fd324;
add.f64 fd352, fd281, fd351;
mul.f64 fd353, fd349, 0d3FE0000000000000;
sub.f64 fd354, fd275, fd353;
sub.f64 fd355, fd319, fd324;
mul.f64 fd356, fd355, 0dBFEBB67AE8584CAA;
add.f64 fd357, fd356, fd354;
sub.f64 fd358, fd354, fd356;
mul.f64 fd359, fd351, 0d3FE0000000000000;
sub.f64 fd360, fd281, fd359;
sub.f64 fd361, fd317, fd322;
mul.f64 fd362, fd361, 0dBFEBB67AE8584CAA;
sub.f64 fd363, fd360, fd362;
add.f64 fd364, fd362, fd360;
add.f64 fd365, fd327, fd332;
add.f64 fd366, fd276, fd365;
add.f64 fd367, fd329, fd334;
add.f64 fd368, fd282, fd367;
mul.f64 fd369, fd365, 0d3FE0000000000000;
sub.f64 fd370, fd276, fd369;
sub.f64 fd371, fd329, fd334;
mul.f64 fd372, fd371, 0dBFEBB67AE8584CAA;
add.f64 fd373, fd372, fd370;
sub.f64 fd374, fd370, fd372;
mul.f64 fd375, fd367, 0d3FE0000000000000;
sub.f64 fd376, fd282, fd375;
sub.f64 fd377, fd327, fd332;
mul.f64 fd378, fd377, 0dBFEBB67AE8584CAA;
sub.f64 fd379, fd376, fd378;
add.f64 fd380, fd378, fd376;
mul.wide.u32 rd7, r7, 954437177;
shr.u64 rd8, rd7, 33;
cvt.u32.u64 r12, rd8;
mul.lo.s32 r13, r12, 9;
sub.s32 r14, r7, r13;
mul.wide.u32 rd9, r12, 16;
mov.u64 rd10, %20;
add.s64 rd11, rd10, rd9;
ld.global.v2.f64 {fd381, fd382}, [rd11];
mul.f64 fd385, fd352, fd382;
mul.f64 fd386, fd350, fd382;
mul.f64 fd387, fd381, fd352;
mul.f64 fd388, fd381, fd381;
mul.f64 fd389, fd382, fd382;
sub.f64 fd390, fd388, fd389;
mul.f64 fd391, fd382, fd381;
fma.rn.f64 fd392, fd382, fd381, fd391;
mul.f64 fd393, fd368, fd392;
mul.f64 fd394, fd366, fd392;
mul.f64 fd395, fd390, fd368;
mul.f64 fd396, fd381, fd390;
mul.f64 fd397, fd382, fd392;
sub.f64 fd398, fd396, fd397;
mul.f64 fd399, fd381, fd392;
fma.rn.f64 fd400, fd382, fd390, fd399;
mul.f64 fd401, fd347, fd400;
mul.f64 fd402, fd341, fd400;
mul.f64 fd403, fd398, fd347;
mul.f64 fd404, fd381, fd398;
mul.f64 fd405, fd382, fd400;
sub.f64 fd406, fd404, fd405;
mul.f64 fd407, fd381, fd400;
fma.rn.f64 fd408, fd382, fd398, fd407;
mul.f64 fd409, fd363, fd408;
mul.f64 fd410, fd357, fd408;
mul.f64 fd411, fd406, fd363;
ld.global.v2.f64 {fd412, fd413}, [rd11+1296];
mul.f64 fd416, fd379, fd413;
mul.f64 fd417, fd373, fd413;
mul.f64 fd418, fd412, fd379;
mul.f64 fd419, fd381, fd412;
mul.f64 fd420, fd382, fd413;
sub.f64 fd421, fd419, fd420;
mul.f64 fd422, fd381, fd413;
fma.rn.f64 fd423, fd382, fd412, fd422;
mul.f64 fd424, fd348, fd423;
mul.f64 fd425, fd342, fd423;
mul.f64 fd426, fd421, fd348;
mul.f64 fd427, fd381, fd421;
mul.f64 fd428, fd382, fd423;
sub.f64 fd429, fd427, fd428;
mul.f64 fd430, fd381, fd423;
fma.rn.f64 fd431, fd382, fd421, fd430;
mul.f64 fd432, fd364, fd431;
mul.f64 fd433, fd358, fd431;
mul.f64 fd434, fd429, fd364;
mul.f64 fd435, fd381, fd429;
mul.f64 fd436, fd382, fd431;
sub.f64 fd437, fd435, fd436;
mul.f64 fd438, fd381, fd431;
fma.rn.f64 fd439, fd382, fd429, fd438;
mul.f64 fd440, fd380, fd439;
mul.f64 fd441, fd374, fd439;
mul.f64 fd442, fd437, fd380;
shl.b32 r15, r14, 4;
add.s32 r16, r8, r15;
barrier.sync 0;
mad.lo.s32 r17, r12, 1296, r16;
add.f64 fd443, fd270, fd336;
add.f64 fd444, fd268, fd335;
st.shared.v2.f64 [r17], {fd444, fd443};
fma.rn.f64 fd445, fd381, fd350, fd385;
sub.f64 fd446, fd387, fd386;
st.shared.v2.f64 [r17+144], {fd445, fd446};
fma.rn.f64 fd447, fd390, fd366, fd393;
sub.f64 fd448, fd395, fd394;
st.shared.v2.f64 [r17+288], {fd447, fd448};
fma.rn.f64 fd449, fd398, fd341, fd401;
sub.f64 fd450, fd403, fd402;
st.shared.v2.f64 [r17+432], {fd449, fd450};
fma.rn.f64 fd451, fd406, fd357, fd409;
sub.f64 fd452, fd411, fd410;
st.shared.v2.f64 [r17+576], {fd451, fd452};
fma.rn.f64 fd453, fd412, fd373, fd416;
sub.f64 fd454, fd418, fd417;
st.shared.v2.f64 [r17+720], {fd453, fd454};
fma.rn.f64 fd455, fd421, fd342, fd424;
sub.f64 fd456, fd426, fd425;
st.shared.v2.f64 [r17+864], {fd455, fd456};
fma.rn.f64 fd457, fd429, fd358, fd432;
sub.f64 fd458, fd434, fd433;
st.shared.v2.f64 [r17+1008], {fd457, fd458};
fma.rn.f64 fd459, fd437, fd374, fd440;
sub.f64 fd460, fd442, fd441;
st.shared.v2.f64 [r17+1152], {fd459, fd460};
barrier.sync 0;
ld.shared.v2.f64 {fd461, fd462}, [r11];
ld.shared.v2.f64 {fd465, fd466}, [r11+11664];
ld.shared.v2.f64 {fd469, fd470}, [r11+23328];
ld.shared.v2.f64 {fd473, fd474}, [r11+34992];
ld.shared.v2.f64 {fd477, fd478}, [r11+46656];
ld.shared.v2.f64 {fd481, fd482}, [r11+58320];
ld.shared.v2.f64 {fd485, fd486}, [r11+69984];
ld.shared.v2.f64 {fd489, fd490}, [r11+81648];
ld.shared.v2.f64 {fd493, fd494}, [r11+93312];
add.f64 fd497, fd473, fd485;
add.f64 fd498, fd461, fd497;
add.f64 fd499, fd474, fd486;
add.f64 fd500, fd462, fd499;
mul.f64 fd501, fd497, 0d3FE0000000000000;
sub.f64 fd502, fd461, fd501;
sub.f64 fd503, fd474, fd486;
mul.f64 fd504, fd503, 0dBFEBB67AE8584CAA;
add.f64 fd505, fd504, fd502;
sub.f64 fd506, fd502, fd504;
mul.f64 fd507, fd499, 0d3FE0000000000000;
sub.f64 fd508, fd462, fd507;
sub.f64 fd509, fd473, fd485;
mul.f64 fd510, fd509, 0dBFEBB67AE8584CAA;
sub.f64 fd511, fd508, fd510;
add.f64 fd512, fd510, fd508;
add.f64 fd513, fd477, fd489;
add.f64 fd514, fd465, fd513;
add.f64 fd515, fd478, fd490;
add.f64 fd516, fd466, fd515;
mul.f64 fd517, fd513, 0d3FE0000000000000;
sub.f64 fd518, fd465, fd517;
sub.f64 fd519, fd478, fd490;
mul.f64 fd520, fd519, 0dBFEBB67AE8584CAA;
add.f64 fd521, fd520, fd518;
sub.f64 fd522, fd518, fd520;
mul.f64 fd523, fd515, 0d3FE0000000000000;
sub.f64 fd524, fd466, fd523;
sub.f64 fd525, fd477, fd489;
mul.f64 fd526, fd525, 0dBFEBB67AE8584CAA;
sub.f64 fd527, fd524, fd526;
add.f64 fd528, fd526, fd524;
add.f64 fd529, fd481, fd493;
add.f64 fd530, fd469, fd529;
add.f64 fd531, fd482, fd494;
add.f64 fd532, fd470, fd531;
mul.f64 fd533, fd529, 0d3FE0000000000000;
sub.f64 fd534, fd469, fd533;
sub.f64 fd535, fd482, fd494;
mul.f64 fd536, fd535, 0dBFEBB67AE8584CAA;
add.f64 fd537, fd536, fd534;
sub.f64 fd538, fd534, fd536;
mul.f64 fd539, fd531, 0d3FE0000000000000;
sub.f64 fd540, fd470, fd539;
sub.f64 fd541, fd481, fd493;
mul.f64 fd542, fd541, 0dBFEBB67AE8584CAA;
sub.f64 fd543, fd540, fd542;
add.f64 fd544, fd542, fd540;
mul.f64 fd545, fd521, 0d3FE8836FA2CF5039;
mul.f64 fd546, fd527, 0d3FE491B7523C161D;
sub.f64 fd547, fd545, fd546;
mul.f64 fd548, fd527, 0d3FE8836FA2CF5039;
fma.rn.f64 fd549, fd521, 0d3FE491B7523C161D, fd548;
mul.f64 fd550, fd537, 0d3FC63A1A7E0B738A;
mul.f64 fd551, fd543, 0d3FEF838B8C811C17;
sub.f64 fd552, fd550, fd551;
mul.f64 fd553, fd543, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd554, fd537, 0d3FEF838B8C811C17, fd553;
mul.f64 fd555, fd522, 0d3FC63A1A7E0B738A;
mul.f64 fd556, fd528, 0d3FEF838B8C811C17;
sub.f64 fd557, fd555, fd556;
mul.f64 fd558, fd528, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd559, fd522, 0d3FEF838B8C811C17, fd558;
mul.f64 fd560, fd538, 0dBFEE11F642522D1C;
mul.f64 fd561, fd544, 0d3FD5E3A8748A0BF5;
sub.f64 fd562, fd560, fd561;
mul.f64 fd563, fd544, 0dBFEE11F642522D1C;
fma.rn.f64 fd564, fd538, 0d3FD5E3A8748A0BF5, fd563;
add.f64 fd565, fd514, fd530;
add.f64 fd566, fd516, fd532;
mul.f64 fd567, fd565, 0d3FE0000000000000;
sub.f64 fd568, fd498, fd567;
sub.f64 fd569, fd516, fd532;
mul.f64 fd570, fd569, 0dBFEBB67AE8584CAA;
add.f64 fd571, fd570, fd568;
sub.f64 fd572, fd568, fd570;
mul.f64 fd573, fd566, 0d3FE0000000000000;
sub.f64 fd574, fd500, fd573;
sub.f64 fd575, fd514, fd530;
mul.f64 fd576, fd575, 0dBFEBB67AE8584CAA;
sub.f64 fd577, fd574, fd576;
add.f64 fd578, fd576, fd574;
add.f64 fd579, fd547, fd552;
add.f64 fd580, fd505, fd579;
add.f64 fd581, fd549, fd554;
add.f64 fd582, fd511, fd581;
mul.f64 fd583, fd579, 0d3FE0000000000000;
sub.f64 fd584, fd505, fd583;
sub.f64 fd585, fd549, fd554;
mul.f64 fd586, fd585, 0dBFEBB67AE8584CAA;
add.f64 fd587, fd586, fd584;
sub.f64 fd588, fd584, fd586;
mul.f64 fd589, fd581, 0d3FE0000000000000;
sub.f64 fd590, fd511, fd589;
sub.f64 fd591, fd547, fd552;
mul.f64 fd592, fd591, 0dBFEBB67AE8584CAA;
sub.f64 fd593, fd590, fd592;
add.f64 fd594, fd592, fd590;
add.f64 fd595, fd557, fd562;
add.f64 fd596, fd506, fd595;
add.f64 fd597, fd559, fd564;
add.f64 fd598, fd512, fd597;
mul.f64 fd599, fd595, 0d3FE0000000000000;
sub.f64 fd600, fd506, fd599;
sub.f64 fd601, fd559, fd564;
mul.f64 fd602, fd601, 0dBFEBB67AE8584CAA;
add.f64 fd603, fd602, fd600;
sub.f64 fd604, fd600, fd602;
mul.f64 fd605, fd597, 0d3FE0000000000000;
sub.f64 fd606, fd512, fd605;
sub.f64 fd607, fd557, fd562;
mul.f64 fd608, fd607, 0dBFEBB67AE8584CAA;
sub.f64 fd609, fd606, fd608;
add.f64 fd610, fd608, fd606;
mul.wide.u32 rd12, r7, -901412889;
shr.u64 rd13, rd12, 38;
cvt.u32.u64 r18, rd13;
mul.lo.s32 r19, r18, 81;
sub.s32 r20, r7, r19;
mul.wide.u32 rd14, r18, 16;
mov.u64 rd15, %21;
add.s64 rd16, rd15, rd14;
ld.global.v2.f64 {fd611, fd612}, [rd16];
mul.f64 fd615, fd582, fd612;
mul.f64 fd616, fd580, fd612;
mul.f64 fd617, fd611, fd582;
mul.f64 fd618, fd611, fd611;
mul.f64 fd619, fd612, fd612;
sub.f64 fd620, fd618, fd619;
mul.f64 fd621, fd612, fd611;
fma.rn.f64 fd622, fd612, fd611, fd621;
mul.f64 fd623, fd598, fd622;
mul.f64 fd624, fd596, fd622;
mul.f64 fd625, fd620, fd598;
mul.f64 fd626, fd611, fd620;
mul.f64 fd627, fd612, fd622;
sub.f64 fd628, fd626, fd627;
mul.f64 fd629, fd611, fd622;
fma.rn.f64 fd630, fd612, fd620, fd629;
mul.f64 fd631, fd577, fd630;
mul.f64 fd632, fd571, fd630;
mul.f64 fd633, fd628, fd577;
mul.f64 fd634, fd611, fd628;
mul.f64 fd635, fd612, fd630;
sub.f64 fd636, fd634, fd635;
mul.f64 fd637, fd611, fd630;
fma.rn.f64 fd638, fd612, fd628, fd637;
mul.f64 fd639, fd593, fd638;
mul.f64 fd640, fd587, fd638;
mul.f64 fd641, fd636, fd593;
ld.global.v2.f64 {fd642, fd643}, [rd16+144];
mul.f64 fd646, fd609, fd643;
mul.f64 fd647, fd603, fd643;
mul.f64 fd648, fd642, fd609;
mul.f64 fd649, fd611, fd642;
mul.f64 fd650, fd612, fd643;
sub.f64 fd651, fd649, fd650;
mul.f64 fd652, fd611, fd643;
fma.rn.f64 fd653, fd612, fd642, fd652;
mul.f64 fd654, fd578, fd653;
mul.f64 fd655, fd572, fd653;
mul.f64 fd656, fd651, fd578;
mul.f64 fd657, fd611, fd651;
mul.f64 fd658, fd612, fd653;
sub.f64 fd659, fd657, fd658;
mul.f64 fd660, fd611, fd653;
fma.rn.f64 fd661, fd612, fd651, fd660;
mul.f64 fd662, fd594, fd661;
mul.f64 fd663, fd588, fd661;
mul.f64 fd664, fd659, fd594;
mul.f64 fd665, fd611, fd659;
mul.f64 fd666, fd612, fd661;
sub.f64 fd667, fd665, fd666;
mul.f64 fd668, fd611, fd661;
fma.rn.f64 fd669, fd612, fd659, fd668;
mul.f64 fd670, fd610, fd669;
mul.f64 fd671, fd604, fd669;
mul.f64 fd672, fd667, fd610;
shl.b32 r21, r20, 4;
add.s32 r22, r8, r21;
barrier.sync 0;
mad.lo.s32 r23, r18, 11664, r22;
add.f64 fd673, fd500, fd566;
add.f64 fd674, fd498, fd565;
st.shared.v2.f64 [r23], {fd674, fd673};
fma.rn.f64 fd675, fd611, fd580, fd615;
sub.f64 fd676, fd617, fd616;
st.shared.v2.f64 [r23+1296], {fd675, fd676};
fma.rn.f64 fd677, fd620, fd596, fd623;
sub.f64 fd678, fd625, fd624;
st.shared.v2.f64 [r23+2592], {fd677, fd678};
fma.rn.f64 fd679, fd628, fd571, fd631;
sub.f64 fd680, fd633, fd632;
st.shared.v2.f64 [r23+3888], {fd679, fd680};
fma.rn.f64 fd681, fd636, fd587, fd639;
sub.f64 fd682, fd641, fd640;
st.shared.v2.f64 [r23+5184], {fd681, fd682};
fma.rn.f64 fd683, fd642, fd603, fd646;
sub.f64 fd684, fd648, fd647;
st.shared.v2.f64 [r23+6480], {fd683, fd684};
fma.rn.f64 fd685, fd651, fd572, fd654;
sub.f64 fd686, fd656, fd655;
st.shared.v2.f64 [r23+7776], {fd685, fd686};
fma.rn.f64 fd687, fd659, fd588, fd662;
sub.f64 fd688, fd664, fd663;
st.shared.v2.f64 [r23+9072], {fd687, fd688};
fma.rn.f64 fd689, fd667, fd604, fd670;
sub.f64 fd690, fd672, fd671;
st.shared.v2.f64 [r23+10368], {fd689, fd690};
barrier.sync 0;
ld.shared.v2.f64 {fd691, fd692}, [r11];
ld.shared.v2.f64 {fd695, fd696}, [r11+11664];
ld.shared.v2.f64 {fd699, fd700}, [r11+23328];
ld.shared.v2.f64 {fd703, fd704}, [r11+34992];
ld.shared.v2.f64 {fd707, fd708}, [r11+46656];
ld.shared.v2.f64 {fd711, fd712}, [r11+58320];
ld.shared.v2.f64 {fd715, fd716}, [r11+69984];
ld.shared.v2.f64 {fd719, fd720}, [r11+81648];
ld.shared.v2.f64 {fd723, fd724}, [r11+93312];
add.f64 fd727, fd703, fd715;
add.f64 fd728, fd691, fd727;
add.f64 fd729, fd704, fd716;
add.f64 fd730, fd692, fd729;
mul.f64 fd731, fd727, 0d3FE0000000000000;
sub.f64 fd732, fd691, fd731;
sub.f64 fd733, fd704, fd716;
mul.f64 fd734, fd733, 0dBFEBB67AE8584CAA;
add.f64 fd735, fd734, fd732;
sub.f64 fd736, fd732, fd734;
mul.f64 fd737, fd729, 0d3FE0000000000000;
sub.f64 fd738, fd692, fd737;
sub.f64 fd739, fd703, fd715;
mul.f64 fd740, fd739, 0dBFEBB67AE8584CAA;
sub.f64 fd741, fd738, fd740;
add.f64 fd742, fd740, fd738;
add.f64 fd743, fd707, fd719;
add.f64 fd744, fd695, fd743;
add.f64 fd745, fd708, fd720;
add.f64 fd746, fd696, fd745;
mul.f64 fd747, fd743, 0d3FE0000000000000;
sub.f64 fd748, fd695, fd747;
sub.f64 fd749, fd708, fd720;
mul.f64 fd750, fd749, 0dBFEBB67AE8584CAA;
add.f64 fd751, fd750, fd748;
sub.f64 fd752, fd748, fd750;
mul.f64 fd753, fd745, 0d3FE0000000000000;
sub.f64 fd754, fd696, fd753;
sub.f64 fd755, fd707, fd719;
mul.f64 fd756, fd755, 0dBFEBB67AE8584CAA;
sub.f64 fd757, fd754, fd756;
add.f64 fd758, fd756, fd754;
add.f64 fd759, fd711, fd723;
add.f64 fd760, fd699, fd759;
add.f64 fd761, fd712, fd724;
add.f64 fd762, fd700, fd761;
mul.f64 fd763, fd759, 0d3FE0000000000000;
sub.f64 fd764, fd699, fd763;
sub.f64 fd765, fd712, fd724;
mul.f64 fd766, fd765, 0dBFEBB67AE8584CAA;
add.f64 fd767, fd766, fd764;
sub.f64 fd768, fd764, fd766;
mul.f64 fd769, fd761, 0d3FE0000000000000;
sub.f64 fd770, fd700, fd769;
sub.f64 fd771, fd711, fd723;
mul.f64 fd772, fd771, 0dBFEBB67AE8584CAA;
sub.f64 fd773, fd770, fd772;
add.f64 fd774, fd772, fd770;
mul.f64 fd775, fd751, 0d3FE8836FA2CF5039;
mul.f64 fd776, fd757, 0d3FE491B7523C161D;
sub.f64 fd777, fd775, fd776;
mul.f64 fd778, fd757, 0d3FE8836FA2CF5039;
fma.rn.f64 fd779, fd751, 0d3FE491B7523C161D, fd778;
mul.f64 fd780, fd767, 0d3FC63A1A7E0B738A;
mul.f64 fd781, fd773, 0d3FEF838B8C811C17;
sub.f64 fd782, fd780, fd781;
mul.f64 fd783, fd773, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd784, fd767, 0d3FEF838B8C811C17, fd783;
mul.f64 fd785, fd752, 0d3FC63A1A7E0B738A;
mul.f64 fd786, fd758, 0d3FEF838B8C811C17;
sub.f64 fd787, fd785, fd786;
mul.f64 fd788, fd758, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd789, fd752, 0d3FEF838B8C811C17, fd788;
mul.f64 fd790, fd768, 0dBFEE11F642522D1C;
mul.f64 fd791, fd774, 0d3FD5E3A8748A0BF5;
sub.f64 fd792, fd790, fd791;
mul.f64 fd793, fd774, 0dBFEE11F642522D1C;
fma.rn.f64 fd794, fd768, 0d3FD5E3A8748A0BF5, fd793;
add.f64 fd795, fd744, fd760;
add.f64 fd796, fd746, fd762;
mul.f64 fd797, fd795, 0d3FE0000000000000;
sub.f64 fd798, fd728, fd797;
sub.f64 fd799, fd746, fd762;
mul.f64 fd800, fd799, 0dBFEBB67AE8584CAA;
mul.f64 fd801, fd796, 0d3FE0000000000000;
sub.f64 fd802, fd730, fd801;
sub.f64 fd803, fd744, fd760;
mul.f64 fd804, fd803, 0dBFEBB67AE8584CAA;
add.f64 fd805, fd777, fd782;
add.f64 fd806, fd779, fd784;
mul.f64 fd807, fd805, 0d3FE0000000000000;
sub.f64 fd808, fd735, fd807;
sub.f64 fd809, fd779, fd784;
mul.f64 fd810, fd809, 0dBFEBB67AE8584CAA;
mul.f64 fd811, fd806, 0d3FE0000000000000;
sub.f64 fd812, fd741, fd811;
sub.f64 fd813, fd777, fd782;
mul.f64 fd814, fd813, 0dBFEBB67AE8584CAA;
add.f64 fd815, fd787, fd792;
add.f64 fd816, fd789, fd794;
mul.f64 fd817, fd815, 0d3FE0000000000000;
sub.f64 fd818, fd736, fd817;
sub.f64 fd819, fd789, fd794;
mul.f64 fd820, fd819, 0dBFEBB67AE8584CAA;
mul.f64 fd821, fd816, 0d3FE0000000000000;
sub.f64 fd822, fd742, fd821;
sub.f64 fd823, fd787, fd792;
mul.f64 fd824, fd823, 0dBFEBB67AE8584CAA;
add.f64 %1, fd730, fd796;
add.f64 %0, fd728, fd795;
add.f64 %3, fd741, fd806;
add.f64 %2, fd735, fd805;
add.f64 %5, fd742, fd816;
add.f64 %4, fd736, fd815;
sub.f64 %7, fd802, fd804;
add.f64 %6, fd800, fd798;
sub.f64 %9, fd812, fd814;
add.f64 %8, fd810, fd808;
sub.f64 %11, fd822, fd824;
add.f64 %10, fd820, fd818;
add.f64 %13, fd804, fd802;
sub.f64 %12, fd798, fd800;
add.f64 %15, fd814, fd812;
sub.f64 %14, fd808, fd810;
add.f64 %17, fd824, fd822;
sub.f64 %16, fd818, fd820;
})"
     : "=d"(rmem[0].x), "=d"(rmem[0].y), "=d"(rmem[1].x), "=d"(rmem[1].y), "=d"(rmem[2].x), "=d"(rmem[2].y), "=d"(rmem[3].x), "=d"(rmem[3].y), "=d"(rmem[4].x), "=d"(rmem[4].y), "=d"(rmem[5].x), "=d"(rmem[5].y), "=d"(rmem[6].x), "=d"(rmem[6].y), "=d"(rmem[7].x), "=d"(rmem[7].y), "=d"(rmem[8].x), "=d"(rmem[8].y): "r"(smem), "l"(lut_dp_9_6561), "l"(lut_dp_9_729), "l"(lut_dp_9_81), "d"(rmem[0].x), "d"(rmem[0].y), "d"(rmem[1].x), "d"(rmem[1].y), "d"(rmem[1].y), "d"(rmem[2].x), "d"(rmem[2].y), "d"(rmem[2].y), "d"(rmem[3].x), "d"(rmem[3].y), "d"(rmem[4].x), "d"(rmem[4].y), "d"(rmem[4].y), "d"(rmem[5].x), "d"(rmem[5].y), "d"(rmem[5].y), "d"(rmem[6].x), "d"(rmem[6].y), "d"(rmem[7].x), "d"(rmem[7].y), "d"(rmem[7].y), "d"(rmem[8].x), "d"(rmem[8].y));
};




template<> __forceinline__ __device__ void cufftdx_private_function<702, double, 1>(cufftdx::detail::complex<double> *rmem, unsigned smem){

asm volatile (R"({
.reg .b32 r<24>;
.reg .f64 fd<789>;
.reg .b64 rd<17>;
mov.u32 r1, %tid.y;
mov.u32 r2, %18;
mad.lo.s32 r3, r1, 52488, r2;
add.f64 fd37, %30, %38;
add.f64 fd38, %22, fd37;
add.f64 fd39, %31, %39;
add.f64 fd40, %23, fd39;
mul.f64 fd41, fd37, 0d3FE0000000000000;
sub.f64 fd42, %22, fd41;
sub.f64 fd43, %31, %39;
mul.f64 fd44, fd43, 0dBFEBB67AE8584CAA;
add.f64 fd45, fd44, fd42;
sub.f64 fd46, fd42, fd44;
mul.f64 fd47, fd39, 0d3FE0000000000000;
sub.f64 fd48, %23, fd47;
sub.f64 fd49, %30, %38;
mul.f64 fd50, fd49, 0dBFEBB67AE8584CAA;
sub.f64 fd51, fd48, fd50;
add.f64 fd52, fd50, fd48;
add.f64 fd53, %32, %40;
add.f64 fd54, %24, fd53;
add.f64 fd55, %34, %42;
add.f64 fd56, %26, fd55;
mul.f64 fd57, fd53, 0d3FE0000000000000;
sub.f64 fd58, %24, fd57;
sub.f64 fd59, %34, %42;
mul.f64 fd60, fd59, 0dBFEBB67AE8584CAA;
add.f64 fd61, fd60, fd58;
sub.f64 fd62, fd58, fd60;
mul.f64 fd63, fd55, 0d3FE0000000000000;
sub.f64 fd64, %26, fd63;
sub.f64 fd65, %32, %40;
mul.f64 fd66, fd65, 0dBFEBB67AE8584CAA;
sub.f64 fd67, fd64, fd66;
add.f64 fd68, fd66, fd64;
add.f64 fd69, %35, %43;
add.f64 fd70, %27, fd69;
add.f64 fd71, %37, %44;
add.f64 fd72, %29, fd71;
mul.f64 fd73, fd69, 0d3FE0000000000000;
sub.f64 fd74, %27, fd73;
sub.f64 fd75, %37, %44;
mul.f64 fd76, fd75, 0dBFEBB67AE8584CAA;
add.f64 fd77, fd76, fd74;
sub.f64 fd78, fd74, fd76;
mul.f64 fd79, fd71, 0d3FE0000000000000;
sub.f64 fd80, %29, fd79;
sub.f64 fd81, %35, %43;
mul.f64 fd82, fd81, 0dBFEBB67AE8584CAA;
sub.f64 fd83, fd80, fd82;
add.f64 fd84, fd82, fd80;
mov.u32 r4, %tid.x;
mul.f64 fd85, fd61, 0d3FE8836FA2CF5039;
mul.f64 fd86, fd67, 0d3FE491B7523C161D;
sub.f64 fd87, fd85, fd86;
mul.f64 fd88, fd67, 0d3FE8836FA2CF5039;
fma.rn.f64 fd89, fd61, 0d3FE491B7523C161D, fd88;
mul.f64 fd90, fd77, 0d3FC63A1A7E0B738A;
mul.f64 fd91, fd83, 0d3FEF838B8C811C17;
sub.f64 fd92, fd90, fd91;
mul.f64 fd93, fd83, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd94, fd77, 0d3FEF838B8C811C17, fd93;
mul.f64 fd95, fd62, 0d3FC63A1A7E0B738A;
mul.f64 fd96, fd68, 0d3FEF838B8C811C17;
sub.f64 fd97, fd95, fd96;
mul.f64 fd98, fd68, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd99, fd62, 0d3FEF838B8C811C17, fd98;
mul.f64 fd100, fd78, 0dBFEE11F642522D1C;
mul.f64 fd101, fd84, 0d3FD5E3A8748A0BF5;
sub.f64 fd102, fd100, fd101;
mul.f64 fd103, fd84, 0dBFEE11F642522D1C;
fma.rn.f64 fd104, fd78, 0d3FD5E3A8748A0BF5, fd103;
add.f64 fd105, fd54, fd70;
add.f64 fd106, fd38, fd105;
add.f64 fd107, fd56, fd72;
add.f64 fd108, fd40, fd107;
mul.f64 fd109, fd105, 0d3FE0000000000000;
sub.f64 fd110, fd38, fd109;
sub.f64 fd111, fd56, fd72;
mul.f64 fd112, fd111, 0dBFEBB67AE8584CAA;
add.f64 fd113, fd112, fd110;
sub.f64 fd114, fd110, fd112;
mul.f64 fd115, fd107, 0d3FE0000000000000;
sub.f64 fd116, fd40, fd115;
sub.f64 fd117, fd54, fd70;
mul.f64 fd118, fd117, 0dBFEBB67AE8584CAA;
sub.f64 fd119, fd116, fd118;
add.f64 fd120, fd118, fd116;
add.f64 fd121, fd87, fd92;
add.f64 fd122, fd45, fd121;
add.f64 fd123, fd89, fd94;
add.f64 fd124, fd51, fd123;
mul.f64 fd125, fd121, 0d3FE0000000000000;
sub.f64 fd126, fd45, fd125;
sub.f64 fd127, fd89, fd94;
mul.f64 fd128, fd127, 0dBFEBB67AE8584CAA;
add.f64 fd129, fd128, fd126;
sub.f64 fd130, fd126, fd128;
mul.f64 fd131, fd123, 0d3FE0000000000000;
sub.f64 fd132, fd51, fd131;
sub.f64 fd133, fd87, fd92;
mul.f64 fd134, fd133, 0dBFEBB67AE8584CAA;
sub.f64 fd135, fd132, fd134;
add.f64 fd136, fd134, fd132;
add.f64 fd137, fd97, fd102;
add.f64 fd138, fd46, fd137;
add.f64 fd139, fd99, fd104;
add.f64 fd140, fd52, fd139;
mul.f64 fd141, fd137, 0d3FE0000000000000;
sub.f64 fd142, fd46, fd141;
sub.f64 fd143, fd99, fd104;
mul.f64 fd144, fd143, 0dBFEBB67AE8584CAA;
add.f64 fd145, fd144, fd142;
sub.f64 fd146, fd142, fd144;
mul.f64 fd147, fd139, 0d3FE0000000000000;
sub.f64 fd148, fd52, fd147;
sub.f64 fd149, fd97, fd102;
mul.f64 fd150, fd149, 0dBFEBB67AE8584CAA;
sub.f64 fd151, fd148, fd150;
add.f64 fd152, fd150, fd148;
mul.wide.u32 rd2, r4, 1508246403;
shr.u64 rd3, rd2, 40;
cvt.u32.u64 r5, rd3;
mul.lo.s32 r6, r5, 729;
sub.s32 r7, r4, r6;
mul.wide.u32 rd4, r7, 16;
mov.u64 rd5, %19;
add.s64 rd6, rd5, rd4;
ld.global.v2.f64 {fd153, fd154}, [rd6];
mul.f64 fd157, fd124, fd154;
fma.rn.f64 fd158, fd153, fd122, fd157;
mul.f64 fd159, fd122, fd154;
mul.f64 fd160, fd153, fd124;
sub.f64 fd161, fd160, fd159;
mul.f64 fd162, fd153, fd153;
mul.f64 fd163, fd154, fd154;
sub.f64 fd164, fd162, fd163;
mul.f64 fd165, fd154, fd153;
fma.rn.f64 fd166, fd154, fd153, fd165;
mul.f64 fd167, fd140, fd166;
fma.rn.f64 fd168, fd164, fd138, fd167;
mul.f64 fd169, fd138, fd166;
mul.f64 fd170, fd164, fd140;
sub.f64 fd171, fd170, fd169;
mul.f64 fd172, fd153, fd164;
mul.f64 fd173, fd154, fd166;
sub.f64 fd174, fd172, fd173;
mul.f64 fd175, fd153, fd166;
fma.rn.f64 fd176, fd154, fd164, fd175;
mul.f64 fd177, fd119, fd176;
fma.rn.f64 fd178, fd174, fd113, fd177;
mul.f64 fd179, fd113, fd176;
mul.f64 fd180, fd174, fd119;
sub.f64 fd181, fd180, fd179;
mul.f64 fd182, fd153, fd174;
mul.f64 fd183, fd154, fd176;
sub.f64 fd184, fd182, fd183;
mul.f64 fd185, fd153, fd176;
fma.rn.f64 fd186, fd154, fd174, fd185;
mul.f64 fd187, fd135, fd186;
fma.rn.f64 fd188, fd184, fd129, fd187;
mul.f64 fd189, fd129, fd186;
mul.f64 fd190, fd184, fd135;
sub.f64 fd191, fd190, fd189;
ld.global.v2.f64 {fd192, fd193}, [rd6+11664];
mul.f64 fd196, fd151, fd193;
fma.rn.f64 fd197, fd192, fd145, fd196;
mul.f64 fd198, fd145, fd193;
mul.f64 fd199, fd192, fd151;
sub.f64 fd200, fd199, fd198;
mul.f64 fd201, fd153, fd192;
mul.f64 fd202, fd154, fd193;
sub.f64 fd203, fd201, fd202;
mul.f64 fd204, fd153, fd193;
fma.rn.f64 fd205, fd154, fd192, fd204;
mul.f64 fd206, fd120, fd205;
fma.rn.f64 fd207, fd203, fd114, fd206;
mul.f64 fd208, fd114, fd205;
mul.f64 fd209, fd203, fd120;
sub.f64 fd210, fd209, fd208;
mul.f64 fd211, fd153, fd203;
mul.f64 fd212, fd154, fd205;
sub.f64 fd213, fd211, fd212;
mul.f64 fd214, fd153, fd205;
fma.rn.f64 fd215, fd154, fd203, fd214;
mul.f64 fd216, fd136, fd215;
fma.rn.f64 fd217, fd213, fd130, fd216;
mul.f64 fd218, fd130, fd215;
mul.f64 fd219, fd213, fd136;
sub.f64 fd220, fd219, fd218;
mul.f64 fd221, fd153, fd213;
mul.f64 fd222, fd154, fd215;
sub.f64 fd223, fd221, fd222;
mul.f64 fd224, fd153, fd215;
fma.rn.f64 fd225, fd154, fd213, fd224;
mul.f64 fd226, fd152, fd225;
fma.rn.f64 fd227, fd223, fd146, fd226;
mul.f64 fd228, fd146, fd225;
mul.f64 fd229, fd223, fd152;
sub.f64 fd230, fd229, fd228;
mad.lo.s32 r8, r5, 52488, r3;
barrier.sync 0;
mad.lo.s32 r9, r7, 72, r8;
st.shared.f64 [r9], fd106;
st.shared.f64 [r9+8], fd158;
st.shared.f64 [r9+16], fd168;
st.shared.f64 [r9+24], fd178;
st.shared.f64 [r9+32], fd188;
st.shared.f64 [r9+40], fd197;
st.shared.f64 [r9+48], fd207;
st.shared.f64 [r9+56], fd217;
st.shared.f64 [r9+64], fd227;
barrier.sync 0;
shl.b32 r10, r7, 6;
sub.s32 r11, r9, r10;
ld.shared.f64 fd231, [r11];
ld.shared.f64 fd232, [r11+5832];
ld.shared.f64 fd233, [r11+11664];
ld.shared.f64 fd234, [r11+17496];
ld.shared.f64 fd235, [r11+23328];
ld.shared.f64 fd236, [r11+29160];
ld.shared.f64 fd237, [r11+34992];
ld.shared.f64 fd238, [r11+40824];
ld.shared.f64 fd239, [r11+46656];
barrier.sync 0;
st.shared.f64 [r9], fd108;
st.shared.f64 [r9+8], fd161;
st.shared.f64 [r9+16], fd171;
st.shared.f64 [r9+24], fd181;
st.shared.f64 [r9+32], fd191;
st.shared.f64 [r9+40], fd200;
st.shared.f64 [r9+48], fd210;
st.shared.f64 [r9+56], fd220;
st.shared.f64 [r9+64], fd230;
barrier.sync 0;
ld.shared.f64 fd240, [r11];
ld.shared.f64 fd241, [r11+5832];
ld.shared.f64 fd242, [r11+11664];
ld.shared.f64 fd243, [r11+17496];
ld.shared.f64 fd244, [r11+23328];
ld.shared.f64 fd245, [r11+29160];
ld.shared.f64 fd246, [r11+34992];
ld.shared.f64 fd247, [r11+40824];
ld.shared.f64 fd248, [r11+46656];
add.f64 fd249, fd234, fd237;
add.f64 fd250, fd231, fd249;
add.f64 fd251, fd243, fd246;
add.f64 fd252, fd240, fd251;
mul.f64 fd253, fd249, 0d3FE0000000000000;
sub.f64 fd254, fd231, fd253;
sub.f64 fd255, fd243, fd246;
mul.f64 fd256, fd255, 0dBFEBB67AE8584CAA;
add.f64 fd257, fd256, fd254;
sub.f64 fd258, fd254, fd256;
mul.f64 fd259, fd251, 0d3FE0000000000000;
sub.f64 fd260, fd240, fd259;
sub.f64 fd261, fd234, fd237;
mul.f64 fd262, fd261, 0dBFEBB67AE8584CAA;
sub.f64 fd263, fd260, fd262;
add.f64 fd264, fd262, fd260;
add.f64 fd265, fd235, fd238;
add.f64 fd266, fd232, fd265;
add.f64 fd267, fd244, fd247;
add.f64 fd268, fd241, fd267;
mul.f64 fd269, fd265, 0d3FE0000000000000;
sub.f64 fd270, fd232, fd269;
sub.f64 fd271, fd244, fd247;
mul.f64 fd272, fd271, 0dBFEBB67AE8584CAA;
add.f64 fd273, fd272, fd270;
sub.f64 fd274, fd270, fd272;
mul.f64 fd275, fd267, 0d3FE0000000000000;
sub.f64 fd276, fd241, fd275;
sub.f64 fd277, fd235, fd238;
mul.f64 fd278, fd277, 0dBFEBB67AE8584CAA;
sub.f64 fd279, fd276, fd278;
add.f64 fd280, fd278, fd276;
add.f64 fd281, fd236, fd239;
add.f64 fd282, fd233, fd281;
add.f64 fd283, fd245, fd248;
add.f64 fd284, fd242, fd283;
mul.f64 fd285, fd281, 0d3FE0000000000000;
sub.f64 fd286, fd233, fd285;
sub.f64 fd287, fd245, fd248;
mul.f64 fd288, fd287, 0dBFEBB67AE8584CAA;
add.f64 fd289, fd288, fd286;
sub.f64 fd290, fd286, fd288;
mul.f64 fd291, fd283, 0d3FE0000000000000;
sub.f64 fd292, fd242, fd291;
sub.f64 fd293, fd236, fd239;
mul.f64 fd294, fd293, 0dBFEBB67AE8584CAA;
sub.f64 fd295, fd292, fd294;
add.f64 fd296, fd294, fd292;
mul.f64 fd297, fd273, 0d3FE8836FA2CF5039;
mul.f64 fd298, fd279, 0d3FE491B7523C161D;
sub.f64 fd299, fd297, fd298;
mul.f64 fd300, fd279, 0d3FE8836FA2CF5039;
fma.rn.f64 fd301, fd273, 0d3FE491B7523C161D, fd300;
mul.f64 fd302, fd289, 0d3FC63A1A7E0B738A;
mul.f64 fd303, fd295, 0d3FEF838B8C811C17;
sub.f64 fd304, fd302, fd303;
mul.f64 fd305, fd295, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd306, fd289, 0d3FEF838B8C811C17, fd305;
mul.f64 fd307, fd274, 0d3FC63A1A7E0B738A;
mul.f64 fd308, fd280, 0d3FEF838B8C811C17;
sub.f64 fd309, fd307, fd308;
mul.f64 fd310, fd280, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd311, fd274, 0d3FEF838B8C811C17, fd310;
mul.f64 fd312, fd290, 0dBFEE11F642522D1C;
mul.f64 fd313, fd296, 0d3FD5E3A8748A0BF5;
sub.f64 fd314, fd312, fd313;
mul.f64 fd315, fd296, 0dBFEE11F642522D1C;
fma.rn.f64 fd316, fd290, 0d3FD5E3A8748A0BF5, fd315;
add.f64 fd317, fd266, fd282;
add.f64 fd318, fd250, fd317;
add.f64 fd319, fd268, fd284;
add.f64 fd320, fd252, fd319;
mul.f64 fd321, fd317, 0d3FE0000000000000;
sub.f64 fd322, fd250, fd321;
sub.f64 fd323, fd268, fd284;
mul.f64 fd324, fd323, 0dBFEBB67AE8584CAA;
add.f64 fd325, fd324, fd322;
sub.f64 fd326, fd322, fd324;
mul.f64 fd327, fd319, 0d3FE0000000000000;
sub.f64 fd328, fd252, fd327;
sub.f64 fd329, fd266, fd282;
mul.f64 fd330, fd329, 0dBFEBB67AE8584CAA;
sub.f64 fd331, fd328, fd330;
add.f64 fd332, fd330, fd328;
add.f64 fd333, fd299, fd304;
add.f64 fd334, fd257, fd333;
add.f64 fd335, fd301, fd306;
add.f64 fd336, fd263, fd335;
mul.f64 fd337, fd333, 0d3FE0000000000000;
sub.f64 fd338, fd257, fd337;
sub.f64 fd339, fd301, fd306;
mul.f64 fd340, fd339, 0dBFEBB67AE8584CAA;
add.f64 fd341, fd340, fd338;
sub.f64 fd342, fd338, fd340;
mul.f64 fd343, fd335, 0d3FE0000000000000;
sub.f64 fd344, fd263, fd343;
sub.f64 fd345, fd299, fd304;
mul.f64 fd346, fd345, 0dBFEBB67AE8584CAA;
sub.f64 fd347, fd344, fd346;
add.f64 fd348, fd346, fd344;
add.f64 fd349, fd309, fd314;
add.f64 fd350, fd258, fd349;
add.f64 fd351, fd311, fd316;
add.f64 fd352, fd264, fd351;
mul.f64 fd353, fd349, 0d3FE0000000000000;
sub.f64 fd354, fd258, fd353;
sub.f64 fd355, fd311, fd316;
mul.f64 fd356, fd355, 0dBFEBB67AE8584CAA;
add.f64 fd357, fd356, fd354;
sub.f64 fd358, fd354, fd356;
mul.f64 fd359, fd351, 0d3FE0000000000000;
sub.f64 fd360, fd264, fd359;
sub.f64 fd361, fd309, fd314;
mul.f64 fd362, fd361, 0dBFEBB67AE8584CAA;
sub.f64 fd363, fd360, fd362;
add.f64 fd364, fd362, fd360;
mul.wide.u32 rd7, r7, 954437177;
shr.u64 rd8, rd7, 33;
cvt.u32.u64 r12, rd8;
mul.lo.s32 r13, r12, 9;
sub.s32 r14, r7, r13;
mul.wide.u32 rd9, r12, 16;
mov.u64 rd10, %20;
add.s64 rd11, rd10, rd9;
ld.global.v2.f64 {fd365, fd366}, [rd11];
mul.f64 fd369, fd336, fd366;
fma.rn.f64 fd370, fd365, fd334, fd369;
mul.f64 fd371, fd334, fd366;
mul.f64 fd372, fd365, fd336;
sub.f64 fd373, fd372, fd371;
mul.f64 fd374, fd365, fd365;
mul.f64 fd375, fd366, fd366;
sub.f64 fd376, fd374, fd375;
mul.f64 fd377, fd366, fd365;
fma.rn.f64 fd378, fd366, fd365, fd377;
mul.f64 fd379, fd352, fd378;
fma.rn.f64 fd380, fd376, fd350, fd379;
mul.f64 fd381, fd350, fd378;
mul.f64 fd382, fd376, fd352;
sub.f64 fd383, fd382, fd381;
mul.f64 fd384, fd365, fd376;
mul.f64 fd385, fd366, fd378;
sub.f64 fd386, fd384, fd385;
mul.f64 fd387, fd365, fd378;
fma.rn.f64 fd388, fd366, fd376, fd387;
mul.f64 fd389, fd331, fd388;
fma.rn.f64 fd390, fd386, fd325, fd389;
mul.f64 fd391, fd325, fd388;
mul.f64 fd392, fd386, fd331;
sub.f64 fd393, fd392, fd391;
mul.f64 fd394, fd365, fd386;
mul.f64 fd395, fd366, fd388;
sub.f64 fd396, fd394, fd395;
mul.f64 fd397, fd365, fd388;
fma.rn.f64 fd398, fd366, fd386, fd397;
mul.f64 fd399, fd347, fd398;
fma.rn.f64 fd400, fd396, fd341, fd399;
mul.f64 fd401, fd341, fd398;
mul.f64 fd402, fd396, fd347;
sub.f64 fd403, fd402, fd401;
ld.global.v2.f64 {fd404, fd405}, [rd11+1296];
mul.f64 fd408, fd363, fd405;
fma.rn.f64 fd409, fd404, fd357, fd408;
mul.f64 fd410, fd357, fd405;
mul.f64 fd411, fd404, fd363;
sub.f64 fd412, fd411, fd410;
mul.f64 fd413, fd365, fd404;
mul.f64 fd414, fd366, fd405;
sub.f64 fd415, fd413, fd414;
mul.f64 fd416, fd365, fd405;
fma.rn.f64 fd417, fd366, fd404, fd416;
mul.f64 fd418, fd332, fd417;
fma.rn.f64 fd419, fd415, fd326, fd418;
mul.f64 fd420, fd326, fd417;
mul.f64 fd421, fd415, fd332;
sub.f64 fd422, fd421, fd420;
mul.f64 fd423, fd365, fd415;
mul.f64 fd424, fd366, fd417;
sub.f64 fd425, fd423, fd424;
mul.f64 fd426, fd365, fd417;
fma.rn.f64 fd427, fd366, fd415, fd426;
mul.f64 fd428, fd348, fd427;
fma.rn.f64 fd429, fd425, fd342, fd428;
mul.f64 fd430, fd342, fd427;
mul.f64 fd431, fd425, fd348;
sub.f64 fd432, fd431, fd430;
mul.f64 fd433, fd365, fd425;
mul.f64 fd434, fd366, fd427;
sub.f64 fd435, fd433, fd434;
mul.f64 fd436, fd365, fd427;
fma.rn.f64 fd437, fd366, fd425, fd436;
mul.f64 fd438, fd364, fd437;
fma.rn.f64 fd439, fd435, fd358, fd438;
mul.f64 fd440, fd358, fd437;
mul.f64 fd441, fd435, fd364;
sub.f64 fd442, fd441, fd440;
shl.b32 r15, r14, 3;
add.s32 r16, r8, r15;
barrier.sync 0;
mad.lo.s32 r17, r12, 648, r16;
st.shared.f64 [r17], fd318;
st.shared.f64 [r17+72], fd370;
st.shared.f64 [r17+144], fd380;
st.shared.f64 [r17+216], fd390;
st.shared.f64 [r17+288], fd400;
st.shared.f64 [r17+360], fd409;
st.shared.f64 [r17+432], fd419;
st.shared.f64 [r17+504], fd429;
st.shared.f64 [r17+576], fd439;
barrier.sync 0;
ld.shared.f64 fd443, [r11];
ld.shared.f64 fd444, [r11+5832];
ld.shared.f64 fd445, [r11+11664];
ld.shared.f64 fd446, [r11+17496];
ld.shared.f64 fd447, [r11+23328];
ld.shared.f64 fd448, [r11+29160];
ld.shared.f64 fd449, [r11+34992];
ld.shared.f64 fd450, [r11+40824];
ld.shared.f64 fd451, [r11+46656];
barrier.sync 0;
st.shared.f64 [r17], fd320;
st.shared.f64 [r17+72], fd373;
st.shared.f64 [r17+144], fd383;
st.shared.f64 [r17+216], fd393;
st.shared.f64 [r17+288], fd403;
st.shared.f64 [r17+360], fd412;
st.shared.f64 [r17+432], fd422;
st.shared.f64 [r17+504], fd432;
st.shared.f64 [r17+576], fd442;
barrier.sync 0;
ld.shared.f64 fd452, [r11];
ld.shared.f64 fd453, [r11+5832];
ld.shared.f64 fd454, [r11+11664];
ld.shared.f64 fd455, [r11+17496];
ld.shared.f64 fd456, [r11+23328];
ld.shared.f64 fd457, [r11+29160];
ld.shared.f64 fd458, [r11+34992];
ld.shared.f64 fd459, [r11+40824];
ld.shared.f64 fd460, [r11+46656];
add.f64 fd461, fd446, fd449;
add.f64 fd462, fd443, fd461;
add.f64 fd463, fd455, fd458;
add.f64 fd464, fd452, fd463;
mul.f64 fd465, fd461, 0d3FE0000000000000;
sub.f64 fd466, fd443, fd465;
sub.f64 fd467, fd455, fd458;
mul.f64 fd468, fd467, 0dBFEBB67AE8584CAA;
add.f64 fd469, fd468, fd466;
sub.f64 fd470, fd466, fd468;
mul.f64 fd471, fd463, 0d3FE0000000000000;
sub.f64 fd472, fd452, fd471;
sub.f64 fd473, fd446, fd449;
mul.f64 fd474, fd473, 0dBFEBB67AE8584CAA;
sub.f64 fd475, fd472, fd474;
add.f64 fd476, fd474, fd472;
add.f64 fd477, fd447, fd450;
add.f64 fd478, fd444, fd477;
add.f64 fd479, fd456, fd459;
add.f64 fd480, fd453, fd479;
mul.f64 fd481, fd477, 0d3FE0000000000000;
sub.f64 fd482, fd444, fd481;
sub.f64 fd483, fd456, fd459;
mul.f64 fd484, fd483, 0dBFEBB67AE8584CAA;
add.f64 fd485, fd484, fd482;
sub.f64 fd486, fd482, fd484;
mul.f64 fd487, fd479, 0d3FE0000000000000;
sub.f64 fd488, fd453, fd487;
sub.f64 fd489, fd447, fd450;
mul.f64 fd490, fd489, 0dBFEBB67AE8584CAA;
sub.f64 fd491, fd488, fd490;
add.f64 fd492, fd490, fd488;
add.f64 fd493, fd448, fd451;
add.f64 fd494, fd445, fd493;
add.f64 fd495, fd457, fd460;
add.f64 fd496, fd454, fd495;
mul.f64 fd497, fd493, 0d3FE0000000000000;
sub.f64 fd498, fd445, fd497;
sub.f64 fd499, fd457, fd460;
mul.f64 fd500, fd499, 0dBFEBB67AE8584CAA;
add.f64 fd501, fd500, fd498;
sub.f64 fd502, fd498, fd500;
mul.f64 fd503, fd495, 0d3FE0000000000000;
sub.f64 fd504, fd454, fd503;
sub.f64 fd505, fd448, fd451;
mul.f64 fd506, fd505, 0dBFEBB67AE8584CAA;
sub.f64 fd507, fd504, fd506;
add.f64 fd508, fd506, fd504;
mul.f64 fd509, fd485, 0d3FE8836FA2CF5039;
mul.f64 fd510, fd491, 0d3FE491B7523C161D;
sub.f64 fd511, fd509, fd510;
mul.f64 fd512, fd491, 0d3FE8836FA2CF5039;
fma.rn.f64 fd513, fd485, 0d3FE491B7523C161D, fd512;
mul.f64 fd514, fd501, 0d3FC63A1A7E0B738A;
mul.f64 fd515, fd507, 0d3FEF838B8C811C17;
sub.f64 fd516, fd514, fd515;
mul.f64 fd517, fd507, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd518, fd501, 0d3FEF838B8C811C17, fd517;
mul.f64 fd519, fd486, 0d3FC63A1A7E0B738A;
mul.f64 fd520, fd492, 0d3FEF838B8C811C17;
sub.f64 fd521, fd519, fd520;
mul.f64 fd522, fd492, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd523, fd486, 0d3FEF838B8C811C17, fd522;
mul.f64 fd524, fd502, 0dBFEE11F642522D1C;
mul.f64 fd525, fd508, 0d3FD5E3A8748A0BF5;
sub.f64 fd526, fd524, fd525;
mul.f64 fd527, fd508, 0dBFEE11F642522D1C;
fma.rn.f64 fd528, fd502, 0d3FD5E3A8748A0BF5, fd527;
add.f64 fd529, fd478, fd494;
add.f64 fd530, fd462, fd529;
add.f64 fd531, fd480, fd496;
add.f64 fd532, fd464, fd531;
mul.f64 fd533, fd529, 0d3FE0000000000000;
sub.f64 fd534, fd462, fd533;
sub.f64 fd535, fd480, fd496;
mul.f64 fd536, fd535, 0dBFEBB67AE8584CAA;
add.f64 fd537, fd536, fd534;
sub.f64 fd538, fd534, fd536;
mul.f64 fd539, fd531, 0d3FE0000000000000;
sub.f64 fd540, fd464, fd539;
sub.f64 fd541, fd478, fd494;
mul.f64 fd542, fd541, 0dBFEBB67AE8584CAA;
sub.f64 fd543, fd540, fd542;
add.f64 fd544, fd542, fd540;
add.f64 fd545, fd511, fd516;
add.f64 fd546, fd469, fd545;
add.f64 fd547, fd513, fd518;
add.f64 fd548, fd475, fd547;
mul.f64 fd549, fd545, 0d3FE0000000000000;
sub.f64 fd550, fd469, fd549;
sub.f64 fd551, fd513, fd518;
mul.f64 fd552, fd551, 0dBFEBB67AE8584CAA;
add.f64 fd553, fd552, fd550;
sub.f64 fd554, fd550, fd552;
mul.f64 fd555, fd547, 0d3FE0000000000000;
sub.f64 fd556, fd475, fd555;
sub.f64 fd557, fd511, fd516;
mul.f64 fd558, fd557, 0dBFEBB67AE8584CAA;
sub.f64 fd559, fd556, fd558;
add.f64 fd560, fd558, fd556;
add.f64 fd561, fd521, fd526;
add.f64 fd562, fd470, fd561;
add.f64 fd563, fd523, fd528;
add.f64 fd564, fd476, fd563;
mul.f64 fd565, fd561, 0d3FE0000000000000;
sub.f64 fd566, fd470, fd565;
sub.f64 fd567, fd523, fd528;
mul.f64 fd568, fd567, 0dBFEBB67AE8584CAA;
add.f64 fd569, fd568, fd566;
sub.f64 fd570, fd566, fd568;
mul.f64 fd571, fd563, 0d3FE0000000000000;
sub.f64 fd572, fd476, fd571;
sub.f64 fd573, fd521, fd526;
mul.f64 fd574, fd573, 0dBFEBB67AE8584CAA;
sub.f64 fd575, fd572, fd574;
add.f64 fd576, fd574, fd572;
mul.wide.u32 rd12, r7, -901412889;
shr.u64 rd13, rd12, 38;
cvt.u32.u64 r18, rd13;
mul.lo.s32 r19, r18, 81;
sub.s32 r20, r7, r19;
mul.wide.u32 rd14, r18, 16;
mov.u64 rd15, %21;
add.s64 rd16, rd15, rd14;
ld.global.v2.f64 {fd577, fd578}, [rd16];
mul.f64 fd581, fd548, fd578;
fma.rn.f64 fd582, fd577, fd546, fd581;
mul.f64 fd583, fd546, fd578;
mul.f64 fd584, fd577, fd548;
sub.f64 fd585, fd584, fd583;
mul.f64 fd586, fd577, fd577;
mul.f64 fd587, fd578, fd578;
sub.f64 fd588, fd586, fd587;
mul.f64 fd589, fd578, fd577;
fma.rn.f64 fd590, fd578, fd577, fd589;
mul.f64 fd591, fd564, fd590;
fma.rn.f64 fd592, fd588, fd562, fd591;
mul.f64 fd593, fd562, fd590;
mul.f64 fd594, fd588, fd564;
sub.f64 fd595, fd594, fd593;
mul.f64 fd596, fd577, fd588;
mul.f64 fd597, fd578, fd590;
sub.f64 fd598, fd596, fd597;
mul.f64 fd599, fd577, fd590;
fma.rn.f64 fd600, fd578, fd588, fd599;
mul.f64 fd601, fd543, fd600;
fma.rn.f64 fd602, fd598, fd537, fd601;
mul.f64 fd603, fd537, fd600;
mul.f64 fd604, fd598, fd543;
sub.f64 fd605, fd604, fd603;
mul.f64 fd606, fd577, fd598;
mul.f64 fd607, fd578, fd600;
sub.f64 fd608, fd606, fd607;
mul.f64 fd609, fd577, fd600;
fma.rn.f64 fd610, fd578, fd598, fd609;
mul.f64 fd611, fd559, fd610;
fma.rn.f64 fd612, fd608, fd553, fd611;
mul.f64 fd613, fd553, fd610;
mul.f64 fd614, fd608, fd559;
sub.f64 fd615, fd614, fd613;
ld.global.v2.f64 {fd616, fd617}, [rd16+144];
mul.f64 fd620, fd575, fd617;
fma.rn.f64 fd621, fd616, fd569, fd620;
mul.f64 fd622, fd569, fd617;
mul.f64 fd623, fd616, fd575;
sub.f64 fd624, fd623, fd622;
mul.f64 fd625, fd577, fd616;
mul.f64 fd626, fd578, fd617;
sub.f64 fd627, fd625, fd626;
mul.f64 fd628, fd577, fd617;
fma.rn.f64 fd629, fd578, fd616, fd628;
mul.f64 fd630, fd544, fd629;
fma.rn.f64 fd631, fd627, fd538, fd630;
mul.f64 fd632, fd538, fd629;
mul.f64 fd633, fd627, fd544;
sub.f64 fd634, fd633, fd632;
mul.f64 fd635, fd577, fd627;
mul.f64 fd636, fd578, fd629;
sub.f64 fd637, fd635, fd636;
mul.f64 fd638, fd577, fd629;
fma.rn.f64 fd639, fd578, fd627, fd638;
mul.f64 fd640, fd560, fd639;
fma.rn.f64 fd641, fd637, fd554, fd640;
mul.f64 fd642, fd554, fd639;
mul.f64 fd643, fd637, fd560;
sub.f64 fd644, fd643, fd642;
mul.f64 fd645, fd577, fd637;
mul.f64 fd646, fd578, fd639;
sub.f64 fd647, fd645, fd646;
mul.f64 fd648, fd577, fd639;
fma.rn.f64 fd649, fd578, fd637, fd648;
mul.f64 fd650, fd576, fd649;
fma.rn.f64 fd651, fd647, fd570, fd650;
mul.f64 fd652, fd570, fd649;
mul.f64 fd653, fd647, fd576;
sub.f64 fd654, fd653, fd652;
shl.b32 r21, r20, 3;
add.s32 r22, r8, r21;
barrier.sync 0;
mad.lo.s32 r23, r18, 5832, r22;
st.shared.f64 [r23], fd530;
st.shared.f64 [r23+648], fd582;
st.shared.f64 [r23+1296], fd592;
st.shared.f64 [r23+1944], fd602;
st.shared.f64 [r23+2592], fd612;
st.shared.f64 [r23+3240], fd621;
st.shared.f64 [r23+3888], fd631;
st.shared.f64 [r23+4536], fd641;
st.shared.f64 [r23+5184], fd651;
barrier.sync 0;
ld.shared.f64 fd655, [r11];
ld.shared.f64 fd656, [r11+5832];
ld.shared.f64 fd657, [r11+11664];
ld.shared.f64 fd658, [r11+17496];
ld.shared.f64 fd659, [r11+23328];
ld.shared.f64 fd660, [r11+29160];
ld.shared.f64 fd661, [r11+34992];
ld.shared.f64 fd662, [r11+40824];
ld.shared.f64 fd663, [r11+46656];
barrier.sync 0;
st.shared.f64 [r23], fd532;
st.shared.f64 [r23+648], fd585;
st.shared.f64 [r23+1296], fd595;
st.shared.f64 [r23+1944], fd605;
st.shared.f64 [r23+2592], fd615;
st.shared.f64 [r23+3240], fd624;
st.shared.f64 [r23+3888], fd634;
st.shared.f64 [r23+4536], fd644;
st.shared.f64 [r23+5184], fd654;
barrier.sync 0;
ld.shared.f64 fd664, [r11];
ld.shared.f64 fd665, [r11+5832];
ld.shared.f64 fd666, [r11+11664];
ld.shared.f64 fd667, [r11+17496];
ld.shared.f64 fd668, [r11+23328];
ld.shared.f64 fd669, [r11+29160];
ld.shared.f64 fd670, [r11+34992];
ld.shared.f64 fd671, [r11+40824];
ld.shared.f64 fd672, [r11+46656];
add.f64 fd673, fd658, fd661;
add.f64 fd674, fd655, fd673;
add.f64 fd675, fd667, fd670;
add.f64 fd676, fd664, fd675;
mul.f64 fd677, fd673, 0d3FE0000000000000;
sub.f64 fd678, fd655, fd677;
sub.f64 fd679, fd667, fd670;
mul.f64 fd680, fd679, 0dBFEBB67AE8584CAA;
add.f64 fd681, fd680, fd678;
sub.f64 fd682, fd678, fd680;
mul.f64 fd683, fd675, 0d3FE0000000000000;
sub.f64 fd684, fd664, fd683;
sub.f64 fd685, fd658, fd661;
mul.f64 fd686, fd685, 0dBFEBB67AE8584CAA;
sub.f64 fd687, fd684, fd686;
add.f64 fd688, fd686, fd684;
add.f64 fd689, fd659, fd662;
add.f64 fd690, fd656, fd689;
add.f64 fd691, fd668, fd671;
add.f64 fd692, fd665, fd691;
mul.f64 fd693, fd689, 0d3FE0000000000000;
sub.f64 fd694, fd656, fd693;
sub.f64 fd695, fd668, fd671;
mul.f64 fd696, fd695, 0dBFEBB67AE8584CAA;
add.f64 fd697, fd696, fd694;
sub.f64 fd698, fd694, fd696;
mul.f64 fd699, fd691, 0d3FE0000000000000;
sub.f64 fd700, fd665, fd699;
sub.f64 fd701, fd659, fd662;
mul.f64 fd702, fd701, 0dBFEBB67AE8584CAA;
sub.f64 fd703, fd700, fd702;
add.f64 fd704, fd702, fd700;
add.f64 fd705, fd660, fd663;
add.f64 fd706, fd657, fd705;
add.f64 fd707, fd669, fd672;
add.f64 fd708, fd666, fd707;
mul.f64 fd709, fd705, 0d3FE0000000000000;
sub.f64 fd710, fd657, fd709;
sub.f64 fd711, fd669, fd672;
mul.f64 fd712, fd711, 0dBFEBB67AE8584CAA;
add.f64 fd713, fd712, fd710;
sub.f64 fd714, fd710, fd712;
mul.f64 fd715, fd707, 0d3FE0000000000000;
sub.f64 fd716, fd666, fd715;
sub.f64 fd717, fd660, fd663;
mul.f64 fd718, fd717, 0dBFEBB67AE8584CAA;
sub.f64 fd719, fd716, fd718;
add.f64 fd720, fd718, fd716;
mul.f64 fd721, fd697, 0d3FE8836FA2CF5039;
mul.f64 fd722, fd703, 0d3FE491B7523C161D;
sub.f64 fd723, fd721, fd722;
mul.f64 fd724, fd703, 0d3FE8836FA2CF5039;
fma.rn.f64 fd725, fd697, 0d3FE491B7523C161D, fd724;
mul.f64 fd726, fd713, 0d3FC63A1A7E0B738A;
mul.f64 fd727, fd719, 0d3FEF838B8C811C17;
sub.f64 fd728, fd726, fd727;
mul.f64 fd729, fd719, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd730, fd713, 0d3FEF838B8C811C17, fd729;
mul.f64 fd731, fd698, 0d3FC63A1A7E0B738A;
mul.f64 fd732, fd704, 0d3FEF838B8C811C17;
sub.f64 fd733, fd731, fd732;
mul.f64 fd734, fd704, 0d3FC63A1A7E0B738A;
fma.rn.f64 fd735, fd698, 0d3FEF838B8C811C17, fd734;
mul.f64 fd736, fd714, 0dBFEE11F642522D1C;
mul.f64 fd737, fd720, 0d3FD5E3A8748A0BF5;
sub.f64 fd738, fd736, fd737;
mul.f64 fd739, fd720, 0dBFEE11F642522D1C;
fma.rn.f64 fd740, fd714, 0d3FD5E3A8748A0BF5, fd739;
add.f64 fd741, fd690, fd706;
add.f64 fd742, fd692, fd708;
mul.f64 fd743, fd741, 0d3FE0000000000000;
sub.f64 fd744, fd674, fd743;
sub.f64 fd745, fd692, fd708;
mul.f64 fd746, fd745, 0dBFEBB67AE8584CAA;
mul.f64 fd747, fd742, 0d3FE0000000000000;
sub.f64 fd748, fd676, fd747;
sub.f64 fd749, fd690, fd706;
mul.f64 fd750, fd749, 0dBFEBB67AE8584CAA;
add.f64 fd751, fd723, fd728;
add.f64 fd752, fd725, fd730;
mul.f64 fd753, fd751, 0d3FE0000000000000;
sub.f64 fd754, fd681, fd753;
sub.f64 fd755, fd725, fd730;
mul.f64 fd756, fd755, 0dBFEBB67AE8584CAA;
mul.f64 fd757, fd752, 0d3FE0000000000000;
sub.f64 fd758, fd687, fd757;
sub.f64 fd759, fd723, fd728;
mul.f64 fd760, fd759, 0dBFEBB67AE8584CAA;
add.f64 fd761, fd733, fd738;
add.f64 fd762, fd735, fd740;
mul.f64 fd763, fd761, 0d3FE0000000000000;
sub.f64 fd764, fd682, fd763;
sub.f64 fd765, fd735, fd740;
mul.f64 fd766, fd765, 0dBFEBB67AE8584CAA;
mul.f64 fd767, fd762, 0d3FE0000000000000;
sub.f64 fd768, fd688, fd767;
sub.f64 fd769, fd733, fd738;
mul.f64 fd770, fd769, 0dBFEBB67AE8584CAA;
add.f64 %0, fd674, fd741;
add.f64 %1, fd676, fd742;
add.f64 %3, fd687, fd752;
add.f64 %2, fd681, fd751;
add.f64 %5, fd688, fd762;
add.f64 %4, fd682, fd761;
add.f64 %6, fd746, fd744;
sub.f64 %7, fd748, fd750;
sub.f64 %9, fd758, fd760;
add.f64 %8, fd756, fd754;
sub.f64 %11, fd768, fd770;
add.f64 %10, fd766, fd764;
sub.f64 %12, fd744, fd746;
add.f64 %13, fd750, fd748;
add.f64 %15, fd760, fd758;
sub.f64 %14, fd754, fd756;
add.f64 %17, fd770, fd768;
sub.f64 %16, fd764, fd766;
})"
     : "=d"(rmem[0].x), "=d"(rmem[0].y), "=d"(rmem[1].x), "=d"(rmem[1].y), "=d"(rmem[2].x), "=d"(rmem[2].y), "=d"(rmem[3].x), "=d"(rmem[3].y), "=d"(rmem[4].x), "=d"(rmem[4].y), "=d"(rmem[5].x), "=d"(rmem[5].y), "=d"(rmem[6].x), "=d"(rmem[6].y), "=d"(rmem[7].x), "=d"(rmem[7].y), "=d"(rmem[8].x), "=d"(rmem[8].y): "r"(smem), "l"(lut_dp_9_6561), "l"(lut_dp_9_729), "l"(lut_dp_9_81), "d"(rmem[0].x), "d"(rmem[0].y), "d"(rmem[1].x), "d"(rmem[1].y), "d"(rmem[1].y), "d"(rmem[2].x), "d"(rmem[2].y), "d"(rmem[2].y), "d"(rmem[3].x), "d"(rmem[3].y), "d"(rmem[4].x), "d"(rmem[4].y), "d"(rmem[4].y), "d"(rmem[5].x), "d"(rmem[5].y), "d"(rmem[5].y), "d"(rmem[6].x), "d"(rmem[6].y), "d"(rmem[7].x), "d"(rmem[7].y), "d"(rmem[7].y), "d"(rmem[8].x), "d"(rmem[8].y));
};


#endif
